#install.packages("MCMCpack", repos="http://cran.r-project.org", lib="~/R_libs")
#install.packages("coda", repos="http://cran.r-project.org", lib="~/R_libs")
#install.packages("mcmcplots", repos="http://cran.r-project.org", lib="~/R_libs")
#install.packages("xtable", repos="http://cran.r-project.org", lib="~/R_libs")
#install.packages("R2HTML", repos="http://cran.r-project.org", lib="~/R_libs")

rm(list = ls())
set.seed(12345)

######### FLAGS  !!!!!!!!!!!!!!!!!!!!!!!
testing = FALSE #INDICATE WHETHER RUN ON PC OR SGE
savemcmc = FALSE #INDICATE WHETHER TO SAVE ALL MCMC PROCESS (EXCEPT COUNTRY) INTO CSV       
       
tau = 1  #precision
thin = 5
niter = 10000*thin
if (testing) niter = 25*thin
nburnin =  5000
if (testing) nburnin = 5

if (testing) {
   library(MCMCpack)
   library(coda)
   library(mcmcplots)
   library(xtable)
   library(R2HTML)
} else {
          library(MCMCpack, lib="~/R_libs")
		  #library(MCMCpack)
          library(coda, lib="~/R_libs")
		  #library(coda)
          library(mcmcplots, lib="~/R_libs")
		  #library(mcmcplots)
          library(xtable, lib="~/R_libs")
          library(R2HTML, lib="~/R_libs")
}                                    


if (testing) {
   #setwd("N:/")
   setwd("C:/Data/Dropbox/Research/2015- After the Uprisings/Analysis/")
} else setwd("~/Uprisings23")

data = read.csv("IRTdemocracy23_all.csv", header=TRUE)
size = nrow(data)


#### dataset for testing
if (testing) {
   size = 2000
   index = sample(x=nrow(data), size=size, replace=FALSE)
   data = data[index,]
   rownames(data) <- NULL
   setwd("C:/Data/Dropbox/Research/2015- After the Uprisings/Analysis/IRT")
} 
#### end dataset for testing



########################################################################
########################################################################
#Calculate Anchors: Most and Least Extreme Respondents
########################################################################
########################################################################

#raw scores of respondents, calculated from how many positive answers to items
#can do that because items haven been recoded so that 1 equals more supportive to democracy
lastitem = which(colnames(data)=="wave")-1
score = rowSums(data[,1:lastitem], na.rm=TRUE)
lowest = min(score)
highest = max(score)

#most and least extreme respondents
leastsupportive = paste0("V",which(score==lowest)[sample(length(which(score==lowest)),1)])
mostsupportive = paste0("V",which(score==highest)[sample(length(which(score==highest)),1)])

constraints=list(lib="-", con="+")
names(constraints)=c(leastsupportive,mostsupportive)

########################################################################
########################################################################
#1-Dimensional, non-hierarchical IRT
########################################################################
########################################################################

irt.data = data[,1:lastitem]
irt.mcmc = MCMCirt1d(irt.data, theta.constraints=constraints,
					 burnin=nburnin, mcmc=niter, thin=thin, seed=12345, t0=0, T0=1, ab0=0, AB0=tau,
					 store.item=TRUE, store.ability=TRUE)

if (savemcmc) {
				write.csv(x=irt.mcmc[,1:size],file="Theta MCMC.csv",row.names=FALSE)    
				write.csv(x=irt.mcmc[,seq(from=(size+1), to=ncol(irt.mcmc))],file="Alpha-Beta MCMC.csv",row.names=FALSE)
			  }


####################################
#CHECK ANCHOR
####################################

pdf("Theta Most Supportive.pdf")
index = which(score==highest)[sample(length(which(score==highest)),min(sum(score==highest),15))]
caterplot(irt.mcmc[,index],quantiles=list(outer=c(0.025,0.975),inner=c(0.025,0.975)), col="red", style="plain")
dev.off()

pdf("Theta Least Supportive.pdf")
index = which(score==lowest)[sample(length(which(score==lowest)),min(sum(score==lowest),15))]
caterplot(irt.mcmc[,index],quantiles=list(outer=c(0.025,0.975),inner=c(0.025,0.975)), col="red", style="plain")
dev.off()

#if the function failed to provide proper anchor
#sum.high = summary(irt.mcmc[,which(score==highest)])$statistics[,1]
#sum.low = summary(irt.mcmc[,which(score==lowest)])$statistics[,1]
#reversed = mean(sum.high)<mean(sum.low)
#if (reversed) { irt.mcmc = -1 * irt.mcmc
#				alpha = seq(from=size+1,to=ncol(irt.mcmc),by=2)
#				irt.mcmc[,alpha] = -1 * irt.mcmc[,alpha]    #alpha doesn't need to be reversed				
#}


####################################
#ALPHA-BETA CODA
####################################

alpha = seq(from=size+1,to=ncol(irt.mcmc),by=2)
sum = summary(irt.mcmc[,alpha])$statistics
alpha = row.names(sum[order(sum[,1]),])              #sort from smallest to largest value and get names

pdf("Alpha Plot.pdf")
caterplot(irt.mcmc[,alpha],quantiles=list(outer=c(0.025,0.975),inner=c(0.025,0.975)), style="plain", col="red")
dev.off()

beta = seq(from=size+2,to=ncol(irt.mcmc),by=2)
sum = summary(irt.mcmc[,beta])$statistics
beta = row.names(sum[order(sum[,1]),])				#sort from smallest to largest value and get names

pdf("Beta Plot.pdf")
caterplot(irt.mcmc[,beta],quantiles=list(outer=c(0.025,0.975), inner=c(0.025,0.975)), style="plain", col="red")
dev.off()

#AUTO-CORRELATION
pdf("Alpha-Beta Plot AutoCorr.pdf")
autocorr.plot(irt.mcmc[,seq(from=size+1,to=ncol(irt.mcmc))],lag.max=75)
dev.off()

pdf("Alpha-Beta Plot Geweke.pdf")
geweke.plot(irt.mcmc[,seq(from=size+1,to=ncol(irt.mcmc))])
dev.off()

HTML(geweke.diag(irt.mcmc[,seq(from=size+1,to=ncol(irt.mcmc))]),file="Alpha-Beta Geweke.html",append=FALSE)


####################################
#SUMMARY STATISTICS
####################################

## Alpha-Beta
sum.alpha = as.matrix(summary(irt.mcmc[,seq(from=size+1,to=ncol(irt.mcmc), by=2)])$statistics)
sum.beta = as.matrix(summary(irt.mcmc[,seq(from=size+2,to=ncol(irt.mcmc), by=2)])$statistics)
items <- data.frame(colnames(irt.data),sum.alpha, sum.beta)
colnames(items) <- c("Item","A Mean","A SD","A Naive SE","A TS SE","B Mean","B SD", "B Naive SE","B TS SE")
write.csv(items,"Alpha-Beta Statistics.csv", row.names=FALSE)

sum.alpha = as.matrix(summary(irt.mcmc[,seq(from=size+1,to=ncol(irt.mcmc), by=2)])$quantiles)
sum.beta = as.matrix(summary(irt.mcmc[,seq(from=size+2,to=ncol(irt.mcmc), by=2)])$quantiles)
items <- data.frame(colnames(irt.data),sum.alpha, sum.beta)
colnames(items) <- c("Item","A 2.5","A 25","A 50","A 75","A 97.5","B 2.5","B 25","B 50","B 75","B 97.5")
write.csv(items,"Alpha-Beta Quantiles.csv", row.names=FALSE)


## Thetas
sum.theta = as.matrix(summary(irt.mcmc[,1:size])$statistics)
row.names(sum.theta) <- gsub("theta.V","",row.names(sum.theta))
write.csv(sum.theta,"Theta Statistics.csv")

sum.theta = as.matrix(summary(irt.mcmc[,1:size])$quantiles)
row.names(sum.theta) <- gsub("theta.V","",row.names(sum.theta))
write.csv(sum.theta,"Theta Quantiles.csv")



################################################################################
#PREDICTION FOR GOODNESS OF FIT
################################################################################

X = summary(irt.mcmc[,1:size])$statistic[,1]
alpha = summary(irt.mcmc[,seq(from=size+1,to=ncol(irt.mcmc),by=2)])$statistic[,1]
beta = summary(irt.mcmc[,seq(from=size+2,to=ncol(irt.mcmc),by=2)])$statistic[,1]

Y = matrix(NA,nrow=nrow(irt.data),ncol=ncol(irt.data))

truepos = rep(0,ncol(irt.data))
trueneg = rep(0,ncol(irt.data))
falsepos = rep(0,ncol(irt.data))
falseneg = rep(0,ncol(irt.data))
denom = rep(0,ncol(irt.data))
for (i in 1:ncol(Y)) {
  Y[,i] = as.numeric(pnorm(X*beta[i]-alpha[i]) > (.5))
  truepos[i] = truepos[i] + sum((Y[,i]==irt.data[,i]) & (Y[,i]==1),na.rm=TRUE)
  trueneg[i] = trueneg[i] + sum((Y[,i]==irt.data[,i]) & (Y[,i]==0),na.rm=TRUE)
  falsepos[i] = falsepos[i] + sum((Y[,i]==1) & (irt.data[,i]==0),na.rm=TRUE)
  falseneg[i] = falseneg[i] + sum((Y[,i]==0) & (irt.data[,i]==1),na.rm=TRUE)
  denom[i] = sum(!is.na(irt.data[,i]))
}

accuracy = cbind(denom, truepos, truepos/denom,
                        trueneg, trueneg/denom,
						falsepos,falsepos/denom,
						falseneg,falseneg/denom,
						truepos+trueneg,(truepos+trueneg)/denom,
						falsepos+falseneg,(falsepos+falseneg)/denom)
colnames(accuracy) <- c("Denominator","TruePos", "% TruePos", "TrueNeg", "% TrueNeg", "FalsePos", "% FalsePos", "FalseNeg", "% FalseNeg", 
					    "Total Correct", "% Correct", "Total Incorrect", "% Incorrect")

accuracy = rbind(accuracy,apply(accuracy,2,sum))
accuracy[nrow(accuracy),3] = mean(accuracy[1:nrow(accuracy)-1,3])  # %true pos
accuracy[nrow(accuracy),5] = mean(accuracy[1:nrow(accuracy)-1,5])  # %true neg
accuracy[nrow(accuracy),7] = mean(accuracy[1:nrow(accuracy)-1,7])  # %false pos
accuracy[nrow(accuracy),9] = mean(accuracy[1:nrow(accuracy)-1,9])  # %false neg
accuracy[nrow(accuracy),11] = mean(accuracy[1:nrow(accuracy)-1,11])  # %correct
accuracy[nrow(accuracy),13] = mean(accuracy[1:nrow(accuracy)-1,13])  # %incorrect

row.names(accuracy) <- c(colnames(irt.data), "Total")

accuracy = round(accuracy,digits=2)
write.csv(accuracy, file="Accuracy.csv")


pdf("Accuracy.pdf",width=14)
barplot(accuracy[,11], main="% Correct Prediction", ylim=c(0,1))
text (x=seq(0.7,1.2*nrow(accuracy),1.2),y=accuracy[,11]+0.03,labels=accuracy[,11])
dev.off()



################################################################################
#BY COUNTRY ANALYSIS
################################################################################

countrynames = c("Algeria2","Algeria3", "Egypt2", "Egypt3", 
				 "Iraq2", "Iraq3", "Jordan2", "Jordan3", "Lebanon2","Lebanon3",
				 "Palestine2", "Palestine3", "Sudan2", "Sudan3", "Tunisia2", "Tunisia3","Yemen2","Yemen3")
				 
data2 = data.frame(data[,I(lastitem+1):ncol(data)], data$country*10+data$wave)
colnames(data2)[ncol(data2)] = "countrywave"


weight = data2$weight
countrylist = data2$countrywave[which(!duplicated(data2$countrywave))]
countrylist = sort(countrylist)
numcountry = length(countrylist)

countrymat = matrix(NA,nrow=nrow(irt.mcmc),ncol=numcountry)
countrymat.wgt = matrix(NA,nrow=nrow(irt.mcmc),ncol=numcountry)
colnames(countrymat) <- countrynames
colnames(countrymat.wgt) <- countrynames


## Draw Country Distributions
x.max = max(summary(irt.mcmc)$statistics[,1])
x.min = min(summary(irt.mcmc)$statistics[,1])

pdf("Country Distributions.pdf")
for (i in 1:numcountry) {
    index = which(data2$countrywave==countrylist[i])
    votermat = irt.mcmc[,index]

    if (testing) {    
       if (sum(data2$countrywave==countrylist[i])<2) {
                                                  countrymat[,i]=votermat                
                                                } else { 
                                                          countrymat[,i] = rowMeans(votermat)
                                                          countrymat.wgt[,i] = (votermat%*%weight[index])/sum(weight[index])
                                                       }
    } else { 
              countrymat[,i] = rowMeans(votermat)
              countrymat.wgt[,i] = (votermat%*%weight[index])/sum(weight[index])
           }
		   
	X = summary(irt.mcmc[,index])$statistics[,1]
	d=density(X)
	max.d = max(d$y)
	max.h = max(hist(X,plot=FALSE)$density)
	plot(d$x,rep(NA,length(d$x)),xlim=c(x.min-.01,x.max+.01),ylim=c(0,max(max.h,max.d)+.01),xlab="Conservatism Score",ylab="Density",main=countrynames[i])
	lines(d, lwd=2)
	hist(X, add=TRUE, prob=TRUE,lwd=1)
	abline(v=mean(countrymat.wgt[,i]), lty=2, lwd=2)
}
dev.off()


##add for overall wave
wavemat = matrix(NA,nrow=nrow(irt.mcmc),ncol=2)
colnames(wavemat) <- c("wave2","wave3")

for (i in 2:3) {
    index = which(data2$wave==i)
    votermat = irt.mcmc[,index]
	wavemat[,i-1] = rowMeans(votermat)
}


##summary statistics and plot for aggregates
countrymcmc = as.mcmc(data.frame(countrymat,wavemat))
countrymcmc.wgt = as.mcmc(data.frame(countrymat.wgt,wavemat))
if (savemcmc) {
				write.csv(x=countrymcmc,file="Country MCMC.csv",row.names=FALSE)
				write.csv(x=countrymcmc.wgt,file="Country Weighted MCMC.csv",row.names=FALSE)
			  }


sum = summary(countrymcmc.wgt)$statistics

pdf("Country Weighted Plot.pdf")
caterplot(countrymcmc.wgt,reorder=FALSE,quantiles=list(outer=c(0.025,0.975),inner=c(0.025,0.975)), col="red", style="plain")
dev.off()


pdf("Country Weighted Plot Summary.pdf")
plot(countrymcmc.wgt)
dev.off()


pdf("Country Weighted Plot AutoCorr.pdf")
autocorr.plot(countrymcmc.wgt,lag.max=75)
dev.off()


pdf("Country Weighted Geweke Plot.pdf")
geweke.plot(countrymcmc.wgt)
dev.off()

HTML("<b>COUNTRY WEIGHTED GEWEKE</b>",file="Country Weighted Geweke.html",append=FALSE)
HTML(geweke.diag(countrymcmc.wgt),file="Country Weighted Geweke.html",append=TRUE)

#DESCRIPTIVE STATISTICS
sum = as.matrix(summary(countrymcmc.wgt)$statistics)
items <- data.frame(c(countrynames,"wave2","wave3"),sum)
write.csv(items, "Country Weighted Statistics.csv", row.names = FALSE)

#QUANTILES
sum = as.matrix(summary(countrymcmc.wgt)$quantiles)
items <- data.frame(c(countrynames,"wave2","wave3"),sum)
write.csv(items, "Country Weighted Quantiles.csv", row.names = FALSE)

rm(countrymat)
rm(countrymat.wgt)
rm(countrymcmc)
rm(countrymcmc.wgt)



#######################################################
#GENERIC FUNCTION TO BE USED FOR OTHER AGGREGATIONS
#######################################################

aggregate <- function(names, codes, datagroup, grouping) {
   mat = matrix(NA,nrow=nrow(irt.mcmc),ncol=length(codes))
   colnames(mat) <- names

   for (i in 1:length(codes)) {
	   index = which(datagroup==codes[i])
	   votermat = irt.mcmc[,index]
	   mat[,i] = rowMeans(votermat)
   }

   if (savemcmc) write.csv(x=mat,file=paste(grouping,"MCMC.csv"),row.names=FALSE)
   mcmc = as.mcmc(mat)

   pdf(paste(grouping,"Plot.pdf"))
   caterplot(mcmc,reorder=FALSE,quantiles=list(outer=c(0.025,0.975),inner=c(0.025,0.975)), col="red", style="plain")
   dev.off()

   pdf(paste(grouping,"Plot Summary.pdf"))
   plot(mcmc)
   dev.off()

   pdf(paste(grouping,"Plot AutoCorr.pdf"))
   autocorr.plot(mcmc,lag.max=75)
   dev.off()

   pdf(paste(grouping,"Geweke Plot.pdf"))
   geweke.plot(mcmc)
   dev.off()

   HTML(paste("<b>",grouping,"GEWEKE</b>"),file=paste(grouping,"Geweke.html"),append=FALSE)
   HTML(geweke.diag(mcmc),file=paste(grouping,"Geweke.html"),append=TRUE)
   
   #STATISTICS
   sum = as.matrix(summary(mcmc)$statistics)
   items <- data.frame(names,sum)
   write.csv(items, paste(grouping,"Statistics.csv"), row.names = FALSE)
   
   #QUANTILES
   sum = as.matrix(summary(mcmc)$quantiles)
   items <- data.frame(names,sum)
   write.csv(items, paste(grouping,"Quantiles.csv"), row.names = FALSE)
   
   rm(mat)
   rm(mcmc)
}

data3 = data.frame(data2,data2$countrywave*10+data2$urban,
				data2$countrywave*10+data2$female,
				data2$countrywave*10+data2$gradualreform,
				data2$countrywave*10+data2$religious)

colnames(data3)[I(ncol(data3)-3):ncol(data3)] = c("ctyurban","ctyfemale","ctygradualreform","ctyreligious")

codes = data3$ctyurban[which(!duplicated(data3$ctyurban))]
codes = sort(codes)

#BY URBAN/RURAL
names=c("Algeria2 Rural","Algeria2 Urban", "Algeria3 Rural", "Algeria3 Urban",
			   "Egypt2 Rural","Egypt2 Urban", "Egypt3 Rural", "Egypt3 Urban",
			   "Iraq2 Rural","Iraq2 Urban", "Iraq3 Rural", "Iraq3 Urban",
			   "Jordan2 Rural","Jordan2 Urban", "Jordan3 Rural", "Jordan3 Urban",
			   "Lebanon2 Rural","Lebanon2 Urban", "Lebanon3 Rural", "Lebanon3 Urban",
			   "Palestine2 Rural","Palestine2 Urban", "Palestine3 Rural", "Palestine3 Urban",
			   "Sudan2 Rural","Sudan2 Urban", "Sudan3 Rural", "Sudan3 Urban",
			   "Tunisia2 Rural","Tunisia2 Urban", "Tunisia3 Rural", "Tunisia3 Urban",
			   "Yemen2 Rural","Yemen2 Urban", "Yemen3 Rural", "Yemen3 Urban"
			  )
aggregate(names, codes, data3$ctyurban, "Group Urban-Rural")


#BY FEMALE/MALE
names=c("Algeria2 Male","Algeria2 Female", "Algeria3 Male", "Algeria3 Female",
			   "Egypt2 Male","Egypt2 Female", "Egypt3 Male", "Egypt3 Female",
			   "Iraq2 Male","Iraq2 Female", "Iraq3 Male", "Iraq3 Female",
			   "Jordan2 Male","Jordan2 Female", "Jordan3 Male", "Jordan3 Female",
			   "Lebanon2 Male","Lebanon2 Female", "Lebanon3 Male", "Lebanon3 Female",
			   "Palestine2 Male","Palestine2 Female", "Palestine3 Male", "Palestine3 Female",
			   "Sudan2 Male","Sudan2 Female", "Sudan3 Male", "Sudan3 Female",
			   "Tunisia2 Male","Tunisia2 Female", "Tunisia3 Male", "Tunisia3 Female",
			   "Yemen2 Male","Yemen2 Female", "Yemen3 Male", "Yemen3 Female"
			  )
aggregate(names, codes, data3$ctyfemale, "Group Female-Male")


#BY GRADUAL REFORM
names=c("Algeria2 Cons","Algeria2 Pros", "Algeria3 Cons", "Algeria3 Pros",
			   "Egypt2 Cons","Egypt2 Pros", "Egypt3 Cons", "Egypt3 Pros",
			   "Iraq2 Cons","Iraq2 Pros", "Iraq3 Cons", "Iraq3 Pros",
			   "Jordan2 Cons","Jordan2 Pros", "Jordan3 Cons", "Jordan3 Pros",
			   "Lebanon2 Cons","Lebanon2 Pros", "Lebanon3 Cons", "Lebanon3 Pros",
			   "Palestine2 Cons","Palestine2 Pros", "Palestine3 Cons", "Palestine3 Pros",
			   "Sudan2 Cons","Sudan2 Pros", "Sudan3 Cons", "Sudan3 Pros",
			   "Tunisia2 Cons","Tunisia2 Pros", "Tunisia3 Cons", "Tunisia3 Pros",
			   "Yemen2 Cons","Yemen2 Pros", "Yemen3 Cons", "Yemen3 Pros"
			  )
aggregate(names, codes, data3$ctygradualreform, "Group Gradual Reform")


#BY RELIGIOSITY
names=c("Algeria2 No","Algeria2 Yes", "Algeria3 No", "Algeria3 Yes",
			   "Egypt2 No","Egypt2 Yes", "Egypt3 No", "Egypt3 Yes",
			   "Iraq2 No","Iraq2 Yes", "Iraq3 No", "Iraq3 Yes",
			   "Jordan2 No","Jordan2 Yes", "Jordan3 No", "Jordan3 Yes",
			   "Lebanon2 No","Lebanon2 Yes", "Lebanon3 No", "Lebanon3 Yes",
			   "Palestine2 No","Palestine2 Yes", "Palestine3 No", "Palestine3 Yes",
			   "Sudan2 No","Sudan2 Yes", "Sudan3 No", "Sudan3 Yes",
			   "Tunisia2 No","Tunisia2 Yes", "Tunisia3 No", "Tunisia3 Yes",
			   "Yemen2 No","Yemen2 Yes", "Yemen3 No", "Yemen3 Yes"
			  )
aggregate(names, codes, data3$ctyreligious, "Group Religiosity")
